% Created 2019-05-13 一 10:40
% Intended LaTeX compiler: pdflatex
\documentclass[11pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{graphicx}
\usepackage{grffile}
\usepackage{longtable}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{textcomp}
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\usepackage{minted}
\usepackage{tikz,qtree}
\usepackage{forest,syntax}
\input{preamble.tex}
\graphicspath{{../../images/Compiler/}}
\author{wugouzi}
\date{\today}
\title{Compiler}
\hypersetup{
 pdfauthor={wugouzi},
 pdftitle={Compiler},
 pdfkeywords={},
 pdfsubject={},
 pdfcreator={Emacs 26.1 (Org mode 9.1.14)}, 
 pdflang={English}}
\begin{document}

\maketitle
\tableofcontents


\section{Chap1 introduction}
\label{sec:org5c7e3d8}
source code -> scanner -> [tokens] -> parser -> [syntax tree] -> semantic
analyzer -> [annotated tree] -> source code optimizer -> [intermediate code]
-> code generator -> [target code] -> target code optimizer -> [target code]

\section{chap2 scanning}
\label{sec:org447eeee}
source code(\textbf{character stream}) -> \textbf{token stream}. Use regular expression.
a ε R|S RS R* R+=R(R*) R?=(R|ε) [abce]=(a|b|c|e) [a-z] [\(^{\text{az}}\)]=anything but
one of the listed chars

comment "\emph{*"([\(^{\text{*}}\)/]|[\(^{\text{*}}\)]"}"|"\textbf{"[\^{}/])}"*/"


finite automata

Thompson's construction


Minimizing the number of states in a DFA
\begin{enumerate}
\item it begins with the most optimistic assumptions possible: it create two sets
\begin{itemize}
\item one consisting of all the accepting states
\item the other consisting of all the nonaccepting states
\end{itemize}
\item given this partition of the states of the original DFA, consider the
transitions on \textbf{each character} a of the alphabet
\begin{itemize}
\item if all accepting states have transitions on a to accepting states,
defines an a-transition from the new accepting state to itself
\item if all accepting states have transitions on a to nonaccepting states \ldots{}
\end{itemize}
\item given this partition of the states of the original DFA, consider the
transitions on each character a of the alphabet
\begin{itemize}
\item if there are two accepting states s and t that have transitions on a that
land in different sets, no a-transition can be defined for this grouping
of the states. a distinguish the states s and t
\item if there are two accpeting states s and t s.t. s has an a-transition to
another accepting state, while t has no a-transition at all. a
distinguish s and t
\end{itemize}
\end{enumerate}

\section{Chap3 context-free grammars and parsing}
\label{sec:orgd85d981}
\subsection{context-free grammars}
\label{sec:orgddd181d}
A context-free grammar involves recursion rules. 4-tuple \((V,\Sigma,S,\to)\).
V nonterminal. Σ terminal. S start symbol. \(\to\subset
   V\times(V\cup\Sigma)*\)

\textbf{Left recursive}: the nonterminal A appears as the first symbol on the
right-hand side of the rule defining A

\textbf{Right recusive}:

\textbf{ε-production}: empty->ε
A grammar that generates a language containing the empty string must have at
least one \textbf{ε-production}
\subsection{Parse tree and abstract syntax trees}
\label{sec:org119e65c}
\subsubsection{parse tree}
\label{sec:orgdb8e2f8}
A \textbf{parse tree} corresponding to a derivation is a labeled tree
\begin{itemize}
\item the interior nodes are labeled by \textbf{nonterminals}
\item the leaf is \textbf{terminals}
\end{itemize}

\textbf{left-most derivation}
\subsubsection{abstract symtax tree}
\label{sec:org924b930}
\begin{forest}
[+ [3] [4] ]
\end{forest}
\subsection{Ambiguity}
\label{sec:orgd8c94a2}
\subsubsection{ambiguity grammars}
\label{sec:org9115d18}
\textbf{ambiguous grammar}: a grammar that generates a string with two distinct parse
trees

Two basic methods:
\begin{enumerate}
\item A rule: that specifies in each ambiguous case which of the parse trees is
the correct one. \textbf{disambbiguating rule}
\begin{itemize}
\item associativity
\end{itemize}
\item change the grammar
\end{enumerate}
\subsubsection{precedence and associativity}
\label{sec:org023ef79}
A \emph{left recursive} rule makes its operators associate on the left
\subsubsection{the dangling else problem}
\label{sec:orgd83d0d4}
\begin{grammar}
<statement> ::= <if-stmt>
\alt `other'

<if-stmt> ::= `if' `(' <exp> `)' <statement>
\alt `if' `(' <exp> `)' <statement> `else' <statement>
\end{grammar}

disambiguating rule is \textbf{most closely nested rule}.
grammar is
\begin{grammar}
<statement> -> <matched-stmt>
\alt <unmatched-stmt>

<matched-stmt> -> `if' `(' <exp> `)' <matched-stmt> `else' <matched-stmt>
\alt `other'

<unmatched-stmt> -> `if' `(' <exp> `)' <statement>
\alt `if' `(' <exp> `)' <matched-stmt> `else' <unmatched-stmt>

<exp> -> `0'
\alt `1'
\end{grammar}
\subsubsection{inessential ambiguity}
\label{sec:org8f80f04}
sometimes a grammar may be ambiguous and yet always produce unique \emph{abstract}
\emph{syntax tree}.

\textbf{inessential ambiguity}: the associated semantics don't depend on what
 disambiguating rule is used
\subsubsection{extended notations: EBNF and syntax diagrams}
\label{sec:orgce9ee75}
\(A\to A\;\alpha\mid\beta\Longrightarrow A\to\beta\;\{\alpha\}\).
\(A\to\alpha\; A\mid\beta\Longrightarrow A\to\{\alpha\}\;\beta\)
\subsection{Formal properties of context-free language}
\label{sec:org8e4b1c9}
A context-free grammar consists of the following
\begin{enumerate}
\item T terminals
\item N nonterminals
\item P grammar rules
\item S start symbol
\end{enumerate}

\textbf{sentential form} a string a in \((T\cup N)*\)

A grammar G is \textbf{ambiguous} if there exists a string \(w\in L(G)\) s.t. w has two
distinct parse trees

\section{Chap4 top-down parsing}
\label{sec:org9210e15}
\subsection{Top-down parsing by recursive-descent}
\label{sec:org4af043b}
not easy and use EBNF
\begin{grammar}
<if-stmt> ::= `if' `(' <exp> `)' <statement>
\alt `if' `(' <exp> `)' <statement> `else' <statement>
\end{grammar}
to if-stmt -> if (exp) statement [else statement]
\subsection{LL(1) parsing}
\label{sec:orge39a342}
use an explicit stack rather than recursive calls.
\begin{grammar}
<S> ::= <E> `+' <S>
\alt <E>

<E> ::= `num'
\alt `(' <S> `)'
\end{grammar}
.
\begin{center}
\begin{tabular}{lrlr}
partly-derived string & lookahead & parsed part & unparsed part\\
\hline
S & ( &  & (1+2+(3+4))+5\\
E+S & ( &  & (1+2+(3+4))+5\\
(S)+S & 1 & ( & 1+2+(3+4))+5\\
(E+S)+S & 1 & ( & 1+2+(3+4))+5\\
(1+S)+S & 2 & (1+ & 2+(3+4))+5\\
(1+E+S)+S & 2 & (1+ & 2+(3+4))+5\\
(1+2+S)+S & ( & (1+2+( & (3+4))+5\\
\end{tabular}
\end{center}

For \(S\to(S)\; S\mid\epsilon\)
\begin{center}
\begin{tabular}{rlll}
step & parsing & input & action\\
\hline
1 & \$S & ()\$ & S->(S)S\\
2 & \$S)S( & ()\$ & match\\
3 & \$S)S & )\$ & S->e\\
4 & \$S) & )\$ & match\\
5 & \$S & \$ & S->e\\
6 & \$ & \$ & match\\
\end{tabular}
\end{center}

Two actions:
\begin{enumerate}
\item generate
\item match: match a token on top of the stack with the next input token
\end{enumerate}

This corresponds to the leftmost derivation. \textbf{characteristic of top-down
parsing}
\subsubsection{LL(1) parsing table}
\label{sec:orgf6de9fe}
parsing table
\begin{center}
\begin{tabular}{llll}
M[N,T] & ( & ) & \$\\
\hline
S & S->(S)S & S->e & S->e\\
\end{tabular}
\end{center}

M is the set of non-terminals.
T is the set of terminals or tokens including \$

Table-constructing rule:
\begin{enumerate}
\item if \(A\to\alpha\) is a production choice and there is a derivation
\(\alpha\Rightarrow*a\beta\) where a is a token then add \(A\to\alpha\) to
M[A,a]
\item if \(A\to\alpha\) and
\(\alpha\Rightarrow*\epsilon,S\textdollar\textsterling\Rightarrow^*\beta
      Aa\gamma\), where S is the start symbol and a is a token(or \$), then add
\(A\to\alpha\) to M[A,a]
\end{enumerate}

A grammar is LL(1) if LL(1) parsing table has at most one production in each entry
\subsubsection{left recursion removal and left factoring}
\label{sec:org1b21d4b}
\textbf{left recursion removal}
\begin{itemize}
\item \textbf{immediate left recursion}: \(exp\to exp\;+\;term|exp\;-\;term|term\)
\item \textbf{indirect left recursion}: \(A\to Bb\) and \(B\to Aa\)
\end{itemize}


\begin{enumerate}
\item Simple immediate left recursion.
\(A\to A\alpha|\beta\) to \(A\to\beta\;A'\) and \(A'\to\alpha\;A'|\epsilon\)
\item general immediate left recursion.
\(A\to A\alpha_1|\dots|A\alpha_n|\beta_1|\dots|\beta_m\) to
\(A\to\beta_1A'|\dots|\beta_mA'\) and
\(A'\to\alpha_1A'|\dots|\alpha_nA'|\epsilon\)
\item general left recursion. grammars with no \$\(\epsilon\)\$-productions and no cycles
\end{enumerate}


doesn't change language, but changes the grammar and parse tree

\textbf{left factoring}.
\(A\to\alpha\beta|\alpha\gamma\) to \(A\to\alpha A'\) and \(A'\to\beta\mid\gamma\)
\subsubsection{Syntax tree construction in LL(1) parsing}
\label{sec:org4ed33b9}
\subsection{First and follow sets}
\label{sec:org21822a4}
X a grammar symbol(a terminal or non-terminal) or \(\epsilon\). Then First(X)
is
\begin{enumerate}
\item if X is a terminal or \(\epsilon\), then First(X)=\{X\}
\item if X is a non-terminal, for each \(X\to X_1X_2\dots X_n\), First(X) contains
First(X1) - \{e\}
\end{enumerate}

A non-terminal A is \textbf{nullable} iff there exists \(A\Rightarrow^*\epsilon\) iff
First(A) contains \(\epsilon\)

Follow(A) is
\begin{enumerate}
\item if A is start symbol, \$ is in Follow(A)
\item if \(B\to\alpha A\gamma\), then
\(\text{First}(\gamma)-\{\epsilon\}\subseteq\text{Follow}(A)\)
\item if \(B\to\alpha A\gamma, \epsilon\in\text{First}(\gamma)\), then Follow(A)
contains Follow(B)
\end{enumerate}
\subsection{Error recovery in top-down parsers}
\label{sec:org26332f8}
\section{Chap5 Bottom-up parsing}
\label{sec:org8ca7ac0}
\subsection{Overview of bottom-up parsing}
\label{sec:orgdd58817}
\begin{itemize}
\item A bottom-up parser uses an \textbf{explicit stack} to perform a parse
\item The parsing stack will contain both tokens and nonterminals
\begin{center}
\begin{tabular}{ll}
\$ & inputstring \$\\
\hline
\ldots{} & \ldots{}\\
\$StartSymbol & \$accept\\
\end{tabular}
\end{center}
\item \textbf{right-most} derivation -- backward
start with the tokens; end with the start symbol
\begin{center}
\begin{tabular}{l}
(1+2+(3+4))+5\\
(E+2+(3+4))+5\\
(S+2+(3+4))+5\\
(S+E+(3+4))+5\\
(S+(3+4))+5\\
(S+(E+4))+5\\
(S+(S+4))+5\\
(S+(S+E))+5\\
(S+(S))+5\\
(S+E)+5\\
(S)+5\\
E+5\\
S+5\\
S+E\\
S\\
\end{tabular}
\end{center}
\item \textbf{parsing actions}: a sequence of \textbf{shift} and \textbf{reduce} operations
\textbf{parser state}: a stack of terminals and non-terminals
\textbf{current derivation step} = always stack + input
\begin{center}
\begin{tabular}{llr}
derivation & step stack & unconsumed input\\
\hline
(1+2+(3+4))+5 &  & (1+2+(3+4))+5\\
 & ( & 1+2+(3+4))+5\\
(E+2+(3+4))+5 & (E & +2+(3+4))+5\\
(S+2+(3+4))+5 & (S & +2+(3+4))+5\\
 & (S+ & 2+(3+4))+5\\
 & (S+2 & +(3+4))+5\\
(S+E+(3+4))+5 & (S+E & +(3+4))+5\\
\end{tabular}
\end{center}
\item 1. \textbf{shift}: shift a terminal from the front of the input to the top of the
stack
\begin{enumerate}
\item \textbf{reduce}: reduce a string α at the top of the stack to a nonterminal A,
given the BNF choice A ⟶ α
\end{enumerate}

A bottom-up parser: \textbf{shift-reduce parser}
\item One further feature of bottom-up parsers： grammars are always augmented
with a \textbf{new start symbol}. if S is the start symbol, a new start symbol S' is
added to the grammar :  S' →S

\item example

S'->S

S ->(S)S|e

S'=>S=>(S)S=>(S)=>()
\begin{center}
\begin{tabular}{rlrl}
 & Parsing stack & Input & Action\\
\hline
1 & \$ & ( ) \$ & Shift\\
2 & \$ ( & ) \$ & Reduce  S -> ε\\
3 & \$ (S & ) \$ & Shift\\
4 & \$ (S ) & \$ & Reduce  S -> ε\\
5 & \$ (S ) S & \$ & Reduce S --> (S) S\\
6 & \$S & \$ & Reduce S'--> S\\
7 & \$S' & \$ & Accept\\
\end{tabular}
\end{center}

\item example

E'->E

E->E+n|n

E'=>E=>E+n=>n+n
\begin{center}
\begin{tabular}{rlrl}
 & Parsing stack & Input & Action\\
1 & \$ & n+n\$ & Shift\\
2 & \$n & +n\$ & Reduce  E->n\\
3 & \$E & +n\$ & Shift\\
4 & \$E+ & n\$ & Shift\\
5 & \$E+n & \$ & Reduce E->E+n\\
6 & \$E & \$ & Reduce E'->E\\
7 & \$E' & \$ & Accept\\
\end{tabular}
\end{center}
\item[{Right sentential form}] \begin{itemize}
\item A \textbf{sentential} form is any string derivable from the start symbol. Note
that this includes the forms with non-terminals at intermediate steps as
well.
\item A \textbf{right-sentential form} is a sentential form that occurs in a step of
rightmost derivation (RMD).
Each of the intermediate strings of terminals and nonterminals in such
a derivation is called a right sentential form
Each such sentential form is split between the parsing stack and the input
during a shift-reduce parse
\item A \textbf{sentence} is a sentential form consisting only of terminals
\end{itemize}

E,E+,E+n are \textbf{viable prefixes} of the right sentential form E+n.
The sequence of symbols on the parsing stack is called \textbf{viable prefix} of the
right sentential form
\item \textbf{handle}
This string, together with the \textbf{position} in the right sentential form where it
occurs, and the production used to reduced it, is called the \textbf{handle} of the right
sentential form

\uline{determining the next handle in a parse is the main task of a shift-reduce parser}
\end{itemize}
\subsection{Finite automata of LR(0) items and LR(0) parsing}
\label{sec:org6fe0ae0}
\begin{itemize}
\item An \textbf{LR(0) item} of a context-free grammar: a production choice with a
distinguished position in its right-hand side
\item If \textbf{A -> α}, \textbf{βγ = α}, then \textbf{A -> β · γ} is an LR(0) item
\item Example
\begin{center}
\begin{tabular}{l}
S' -> S\\
S -> (S)S $\backslash$ e\\
S' -> ·S\\
S' -> S·\\
S -> ·(S)S\\
S -> (·S)S\\
S -> (S·)S\\
S -> (S)·S\\
S -> (S)S·\\
S -> ·\\
\end{tabular}
\end{center}
\end{itemize}
\subsubsection{Finite automata of items}
\label{sec:org035fd03}
\begin{itemize}
\item The LR(0) items: as the state of a finite automata
\item construct the DFA of sets of LR(0) using the subset construction from NFA
\item If X is a token or a nonterminal

\begin{tikzpicture}
[place/.style={circle,minimum size=5mm}]
\node (x1) at (0,0) [place] {$A\to\alpha\cdot X\eta$};
\node (x2) at (5,0) [place] {$A\to\alpha X\cdot\eta$};
\draw [->] (x1) to node [above] {X} (x2);
\end{tikzpicture}
\item If X is a token, then this transition corresponds to a shift of X from the
input to the top of the stack during a parse
\item if X is a nonterminal,
X will never appear as an input symbol

\begin{tikzpicture}
\node (x1) at (0,0) [circle] {$A\to\alpha\cdot X\eta$};
\node (x2) at (5,0) [circle] {$X\to\cdot\beta$};
\draw [->] (x1) to node [above] {$\epsilon$} (x2);
\end{tikzpicture}
\item The \textbf{start state} of the NFA ↔ the \textbf{initial state} of the parser: the stack is
empty
\item the solution is to augment the grammar by a single production S' -> S
\item \textbf{S'->·S} the \textbf{start state} of the NFA
\end{itemize}
\subsubsection{The LR(0) parsing algorithm}
\label{sec:org4748a59}
\begin{itemize}
\item the parsing stack to store: \textbf{symbols} and \textbf{state numbers}
\item pushing the new \textbf{state number} onto the parsing stack after each push of \textbf{a
symbol}
\item Let s be the current state. Then actions are
\begin{enumerate}
\item if state s contains any item of the form \textbf{A -> α·Xβ} (X is a terminal).
Then the action is to shift the current input token onto the stack
\item If state s contains any \textbf{complete item} (an item of the form \textbf{A->γ·}),
then the action is to reduce by the rule \textbf{A->γ·}
\begin{itemize}
\item A \textbf{reduction} by the rule \textbf{S'->S} where S' is the start state
\item \textbf{acceptance} if the input is empty
\item \textbf{Error} if the input is not empty
\end{itemize}
\end{enumerate}
\item A grammar is \textbf{LR(0)} grammar if the above rules are unambiguous
\item A grammar is \textbf{LR(0)} iff
\begin{itemize}
\item Each state is a shift state
\item A reduce state containing a single complete item
\end{itemize}
\item table
\begin{center}
\begin{tabular}{rllrrlr}
state & action & rule & input & input & input & goto\\
\hline
 &  &  & ( & a & ) & A\\
0 & shift &  & 3 & 2 &  & 1\\
1 & reduce & A'->A &  &  &  & \\
2 & reduce & A->(A) &  &  &  & \\
3 & shift &  & 3 & 2 &  & 4\\
4 & shift &  &  &  & 5 & \\
5 & reduce & A->a &  &  &  & \\
\end{tabular}
\end{center}
\end{itemize}
\subsection{SLR(1) Parsing (simple LR(1))}
\label{sec:org5d76fb0}
\begin{itemize}
\item \textbf{definition}
\begin{enumerate}
\item if state s contains any item of form \(A\to\alpha\cdot X\beta\), then the
action is to shift the current input token onto the stack, and the new
state to be pushed on the stack is the state containing the item
\(A\to\alpha\cdot X\beta\)
\item if state s contains the complete item \(A\to\gamma\cdot\), and \uline{the next token in}
\uline{the input string is in Follow(A)}, then the action is to reduce by the
rule \(A\to\gamma\)
\begin{itemize}
\item A reduction by the rule \textbf{S'->S} where S' is the start state, this will
happen only if the next input token is \$
\item remove the string γ and all of its corresponding states from the parsing
stack
\item back up in the DFA to the state from which the construction of γ begin
\item this state must contain an item of the form \(B\to\alpha\cdot A\beta\).
Push A to the stack, and push the state containing the item
\(B\to\alpha\cdot A\beta\)
\end{itemize}
\item if the next input token is s.t. neither of the above two cases applies,
an error is declared
\end{enumerate}
\item A grammar is \textbf{SLR(1)} iff for any state s
\begin{enumerate}
\item for any item \(A\to\alpha\cdot X\beta\) in s with X a terminal, there is no \uline{complete}
\uline{item} \(B\to\gamma\cdot\) in s with X ∈ Follow(B)
\item For any two complete item \(A\to\alpha\cdot\) and \(B\to\beta\cdot\) in s,
\(\text{Follow}(A)\cap\text{Follow(B)}=\emptyset\)
\end{enumerate}
\item right recursion can cause stack overflow
\end{itemize}
\subsubsection{disambiguating rules for parsing conflicts}
\label{sec:org56fe75e}
\begin{itemize}
\item two kinds of parsing conflicts in SLR(1) parsing
\textbf{shift-reduce} conflicts
\textbf{reduce-reduce} conflicts
\item in the case of shift-reduce conflicts, there is a natural
\textbf{disambiguaiting rule}: \uline{always prefer shift over the reduce}
\item 
\end{itemize}
\subsubsection{limits of SLR(1) parsing power}
\label{sec:orge0c5fe6}
\subsection{General LR(1) and LALR(1) parsing}
\label{sec:orgd93ca88}

\begin{itemize}
\item the difficulty with the SLR(1) method:
applies lookaheads after the construction of the DFA of LR(0) items
\item An \textbf{LR(1)} item is a pair consisting of an \textbf{LR(0)} item and a \textbf{lookahead} token
\item \textbf{LR(1)} item as
\textbf{[A->α·β, a]}
A->α·β is LR(0) item, a is a token
\item \textbf{definition of LR(1) transitions} main difference of LR(0) and LR(1)
\textbf{[A->α·Xγ, a]}, X is any symbol, there is a transition on X to
\textbf{[A->αX·γ,a]}
\textbf{[A->α·Bγ,a]}, B nonterminal, there are ε-transitions to items \textbf{[B->·β,b]}
for every \textbf{B->β} and for every token b in \textbf{First(γa)}
\end{itemize}
\subsubsection{Finite automata of LR(1) items}
\label{sec:org67b1534}
\begin{itemize}
\item \textbf{start} state
S'->S
\item start item

\textbf{[S'->·S, \$]}
\end{itemize}
\subsubsection{The LR(1) parsing algorithm}
\label{sec:org0ef317f}
\begin{itemize}
\item the general LR(1) parsing algorithm
Let s be the current state.

\begin{enumerate}
\item s:[A->α·Xβ,a], X terminal, X is the next token in the input string \textbf{shift}
\item s: [A->α·,a], the next token in the input string is a \textbf{reduce}
\item otherwise error
\end{enumerate}
\item A grammar is \textbf{LR(1)} iff for any state s
\begin{enumerate}
\item for any item \textbf{[A->α·β,a]} in s with X a terminal, there is no item in s
of the form \textbf{[B->γ·,X]} (otherwise there is a \uline{shift-reduce} conflict
\item there are no two item in s of the form \textbf{[A->α·,a]} and \textbf{[B->β·,a]}
\end{enumerate}
\end{itemize}
\subsection{LALR(1) parsing}
\label{sec:org572ade3}
\begin{itemize}
\item the size of the DFA of sets of LR(1) items is too large
\item first principle of LAIR(1) parsing
the core of a state of DFA of LR(1) is a state of the DFA of LR(0) items
\item second principle of LAIR(1) parsing
s₁,s₂ of DFA of LR(1) that have the same core, suppose there is a transition
on the symbol X from s₁ to a state t₁, then there is also a transition on X
from state s₂ to a state t₂, and the states t₁ and t₂ have the same core
\item if a grammar is LR(1) then the LALR(1) parsing table cannot have any
shift-reduce conflicts, there may be reduce-reduce conflicts
\item if a grammar is SLR(1), then it's LALR(1)
\item compute the DFA of LALR(1) items directly from the DFA of LR(0) items through
a process of \textbf{propagating lookaheads}
\end{itemize}
\subsection{Error recovery in Bottom-up parsers}
\label{sec:org984491d}
A bottom-up parser will detect an error when a blank entry is detected
\section{chap6 semantics analysis}
\label{sec:org8d257a5}
\subsection{Attributes and attribute grammars}
\label{sec:org1a58c37}
\textbf{attribute}: any property of a programming language constructs. May be fixed prior to
the compilation process or be only determinable during program execution

\textbf{binding} of the attribute: the process of computing an attribute and associating its
computed value with the language construct in question

\textbf{binding time}: the time during the compilation/execution process when the binding of
an attribute occurs

\textbf{static attributes/dynamic attributes}: based on the difference of the binding time

\textbf{type checker}: an analyzer.
computes the data type attribute of all language entities for which
data types are defined. And verifies that these types conform to the type rules of
the language

\textbf{type checking}: set of rules that ensure the type consistency of different constructs
in the program. e.g. operands types and so on
\subsubsection{attribute grammars}
\label{sec:org651029e}
\begin{itemize}
\item \(X.a\): the value of a associated to X

\(X\) is a grammar symbol and \(a\) is an attribute associated to \(X\)
\item \textbf{syntax-directed semantics}: attributes are associated directly with the grammar
symbols of the language
\item given attributes \(a_1, a_2,...,a_k\) for each grammar rule
\(X_0\to X_1\dots X_n\), the values of
the attributes \(X_i.a_j\) of each grammar symbol \(X_i\) are related to the values of the
attributes of the other symbols in the rule
\item an \textbf{attribute grammar}

\(X_i.a_j=f_{ij}(X_0.a_1,\dots,X_0.a_k,\dots,X_n.a_1,\dots,X_n.a_k)\)
\item example

For
\begin{grammar}
<number> ::= <number> <digit> \alt <digit>

<digit> ::= `[0123456789]'
\end{grammar}
\begin{center}
\begin{tabular}{ll}
grammar rule & semantic rules\\
\hline
\(number1 \to number2 \; digit\) & \(number1.val=number2.val\times 10+digit.val\)\\
\(number\to digit\) & \(number.val=digit.val\)\\
\(digit\to 0\) & \(digit.val=0\)\\
\end{tabular}
\end{center}
\end{itemize}


\begin{forest}
qtree,
[{number\\($val=34*10+5=345$)}
 [{number\\($val=3*10+4=34$)}
  [{number\\$(val=3)$}
   [{digit\\$(val=3)$}
    [3]
   ]
  ]
  [{digit\\$(val=4)$} [4]]
 ]
 [{digit\\$(val=5)$} [5]]
]
\end{forest}
\subsubsection{simplifications and extensions to attribute grammars}
\label{sec:org57658fd}
\begin{itemize}
\item \textbf{metalanguage} for the attribute grammar: the collection of expressions allowable in
an attribute equation
\item \textbf{functions} can be added to the metalanguage whose definitions may be given elsewhere
\item \textbf{simplifications}
\begin{enumerate}
\item using ambiguous grammar
\item using abstract syntax tree instead of parse tree
\end{enumerate}
\end{itemize}

\subsection{Algorithms for attribute computation}
\label{sec:org5e65499}

\begin{itemize}
\item an edge from Xₘ.aₖ to Xᵢ.aⱼ expressing the dependency of Xᵢ.aⱼ on Xₘ.aₖ
\end{itemize}
\subsubsection{dependency graphs and evaluation order}
\label{sec:org91eed1d}
\begin{itemize}
\item each grammar rule choice has an \textbf{associated dependency graph}
\item \(X_i.a_j=f_{ij}(\dots,X_m.a_k,\dots)\)

an edge from each \(X_m.a_k\) to \(X_i.a_j\)

\ttfamily
\begin{tikzpicture}
[level 1/.style={sibling distance=20mm},
 level 2/.style={sibling distance=20mm},<-,baseline]
  \node {Num.val}
  child {node {Number.val}
    child {node {number.val}
      child {node {Digit.val}}}
    child {node {Digit.val}}}
  child {node {Digit.val}};
\end{tikzpicture}
\rmfamily
\item another example
\begin{grammar}
<decl> ::= <type> <var-list>

<type> ::= `int' \alt `float'

<var-list> ::= `id' `,' <var-list> \alt `id'
\end{grammar}

\begin{center}
\begin{tabular}{ll}
grammar Rule & semantic Rules\\
\hline
\(decl\to type\;var-list\) & \(var-list.dtype = type.dtype\)\\
\(type \to int\) & \(type.dtype = integer\)\\
\(type \to float\) & \(type.dtype = real\)\\
\(var-list1\to id,\;var-list2\) & \(id.dtype = var-list1.dtype\)\\
 & \(var-list2.dtype = var-list1.dtype\)\\
\(var-list \to id\) & \(id.dtype = var-list.dtype\)\\
\end{tabular}
\end{center}

\includegraphics[width=100mm]{DeclDependencyGraph.png}
\item \textbf{directed acyclic graphs} DAG
topological sort
\end{itemize}

How attribute values are found at the roots of the graph
\begin{itemize}
\item \textbf{Parse tree method}: construction of the dependency graph is based on the
specific parse tree at compile time, add complexity and need circularity
detective
\item \textbf{Rule based method}: fix an order for attribute evaluation at compiler
construction time. It depends on an analysis of the attribute equations, or
semantic rules
\end{itemize}
\subsubsection{synthesized and inherited attributes}
\label{sec:org9d7e5e8}
\begin{itemize}
\item \textbf{synthesized attributes}
\begin{itemize}
\item an attribute is synthesized if all its dependencies point from child to parent in
the parse tree
\item \textbf{S-attributed grammar}

an attribute grammar where all the attributes are synthesized
\end{itemize}
\item \textbf{inherited attributes}

inheritance from parent to siblings, from siblings to siblings.
\end{itemize}
\subsubsection{attributes as parameters and returned values}
\label{sec:org2c6e00f}
\subsubsection{The use of external data structures to store attributes values}
\label{sec:orgd977cee}
\begin{itemize}
\item Applicability
\begin{itemize}
\item Not suitable to the method of \textbf{parameters} and \textbf{returned values}
\item particularly when the attribute values have significant structure
and may be needed at arbitrary points during translation
\item Not reasonable to be stored in the syntax tree nodes
\end{itemize}
\item Ways:
\begin{itemize}
\item external data structures: table, graphs and other data structures. One
of the prime examples is the symbol table
\item replace attribute equations by calls to procedures representing
operations on the appropriate data structure used to maintain the
attribute values
\end{itemize}
\end{itemize}
\subsubsection{The computation of attributes during parsing}
\label{sec:orge9df96c}
\begin{itemize}
\item \textbf{L-attributed}
\begin{itemize}
\item An attribute grammar of \(a_1,\dots,a_k\) is \textbf{L-attributed} if for each
inherited attribute \(a_j\) and each grammar rule \(X_0\to X_1\dots X_n\)
the associated equations for a\(_{\text{j}}\) are

\(X_i.a_j=f_{ij}(X_0.a_1,\dots,X_0.a_k,X_1.a_1,\dots,X_1.a_k,\dots,X_{i-1}
        .a_1,\dots,X_{i-1}.a_k)\)
\end{itemize}
\item \textbf{S-attributed grammar} is L-attributed
\item given an \emph{L-attributed} grammar where the \emph{inherited} attributes don't depend
on the \emph{synthesized} attributes
\begin{enumerate}
\item \textbf{Top-down parser}: a recursive-descent parser can evaluate all the
attributes by turning the inherited attributes into parameters and
synthesized attributes into returned values.
\item \textbf{Bottom-up parser}: LR parsers are suited to handling primarily
synthesized attributes, but are difficult for inherited attributes
\end{enumerate}
\item \(A\to B\;C\quad C.i=f(B.s)\) \emph{s} is a \emph{synthesized} attribute

\begin{center}
\begin{tabular}{ll}
Grammar Rule & Semantic Rules\\
\hline
\(A\to BDC\) & \\
\(B\to\dots\) & compute \(B.s\)\\
\(D\to\epsilon\) & \(saved_i=f(valstack[top])\)\\
\(C\to\dots\) & \(saved_i\) is available\\
\end{tabular}
\end{center}
\end{itemize}
\subsubsection{The dependence of attributes computation on the syntax}
\label{sec:org455710d}
\textbf{Theorem}. Given an attribute grammar , \uline{all inherited attributes can be
changed into synthesized attributes} by suitable modification of the grammar,
without changing the language of the grammar. (Knuth[1968])


\subsection{The Symbol Table}
\label{sec:org15a33f7}
\textbf{semantic checks} refer to properties of identifiers in the program - their
scope or type

\begin{center}
\begin{tabular}{llll}
NAME & KIND & TYPE & ATTRIBUTES\\
\hline
foo & fun & int * int -> bool & extern\\
\end{tabular}
\end{center}
\subsubsection{The structure of the symbol table}
\label{sec:orgdfe6253}
\begin{enumerate}
\item Linear list
\item Various search tree structures

AVL, B tree
\item hash tables

best choice

Collision resolution
\begin{enumerate}
\item open addressing
\item separate chaining
\end{enumerate}

The process of the hash function \(f:\Sigma^*\to\mathbb{N}/(size-1)\mathbb{N}\)

Good solution: repeatedly use a constant \(\alpha\) as multiplying factor

\(h_{i+1}=\alpha h_i+c_i, \quad h_0 = 0\)

Final hash value \(h=h_n\mod size\). Typically \(\alpha\) is a power of 2
\end{enumerate}
\subsubsection{Declarations}
\label{sec:org386c90a}
\begin{itemize}
\item constant declarations
\item type declarations
\item variable declarations
\item procedure/function declarations
\end{itemize}
\subsubsection{Scope rules and block structure}
\label{sec:orgd702406}
two rules
\begin{itemize}
\item Declaration before use
\item the most closely nested rule for block structure
\end{itemize}
\subsubsection{interaction of same-level declarations}
\label{sec:orgc31c840}
\subsubsection{an extended example of an attribute grammar using a symbol table}
\label{sec:org764be4f}
\begin{grammar}
<S> ::= <exp>

<exp> ::= `(' <exp> `)' \alt  <exp> `+' <exp>
\alt `id' \alt `num' \alt `let' <dec-list> `in' <exp>

<dec-list> ::= <dec-list> `,' <decl> \alt <decl>

<decl> ::= `id' `=' <exp>
\end{grammar}

Three attributes
\begin{itemize}
\item \texttt{err}: synthesize attribute. represent error
\item \texttt{symbol}: inherited attribute. represent the symbol table
\item \texttt{nestlevel}: inherited attribute, nonnegtive integer. represent the current
nesting level of the let blocks
\end{itemize}
\ttfamily
\begin{center}
\begin{tabular}{ll}
Grammar Rule & Semantic Rules\\
\hline
\emph{S \(\to\) exp} & exp.symtab = emptytable\\
 & exp.nestlevel = 0\\
 & S.err = exp.err\\
\hline
\emph{exp1 \(\to\) exp2+exp3} & exp2.symtab=exp1.symtab\\
 & exp3.symtab=exp1.symtab\\
 & exp2.nestlevel=exp1.nestlevel\\
 & exp3.nestlevel=exp1.nestlevel\\
 & exp1.err = exp2.err or exp3.err\\
\hline
\emph{exp1 \(\to\) (exp2)} & exp2.symtab =exp1.symtab\\
 & exp2.nestlevel =exp1.nestlevel\\
 & exp1.err = exp2.err\\
\hline
\emph{exp \(\to\) id} & exp.err = not isin(exp.symtab, id.name)\\
\hline
\emph{exp \(\to\) num} & exp.err = false\\
\hline
\emph{exp1 \(\to\) let dec-list in exp2} & dec-list.intab=exp1.symtab\\
 & dec-list.nestlevel=exp1.nestlevel+1\\
 & exp2.symtab=dec-list.outtab\\
 & exp2.nestlevel=dec-list.nestlevel\\
 & exp1.err = (dec-list.outtab=errtab) or exp2.err\\
\hline
\emph{dec-list1 \(\to\) dec-list2,decl} & dec-list2.intab= dec-list1.intab\\
 & dec-list2.nestlevel=dec-list1.nestlevel\\
 & decl.intab=dec-list2.outtab\\
 & decl.nestlevel=dec-list2.nestlevel\\
 & decl-list1.outtab=decl.outtab\\
\hline
\emph{dec-list \(\to\) decl} & decl.intab = dec-list.intab\\
 & decl.nestlevel=dec-list.nestlevel\\
 & dec-list.outtab=decl.outtab\\
\hline
\emph{decl \(\to\) id = exp} & exp.symtab = decl.intab\\
 & exp.nestlevel=decl.nestlevel\\
 & decl.outtab =\\
 & if(decl.intab = errtab)or exp.err\\
 & then errtab\\
 & else\\
 & if (lookup(decl.intab, id.name)= decl.nestlevel)\\
 & then errtab\\
 & else\\
 & insert(decl.intab,id.name,decl.nestlevel)\\
\end{tabular}
\end{center}
\rmfamily

\subsection{Data types and type checking}
\label{sec:org8f574f2}
Type inference. Type checking

\subsubsection{type names, type declarations and recursive type}
\label{sec:orgdf13641}

\subsubsection{type equivalence}
\label{sec:org015c055}
two type expression represent the same type

\textbf{structural equivalence}: two types are the same if and only if they have the same structure

\textbf{name equivalence}: two type expressions are equivalent if and only if they are either the
the same simple type or are the same type name

\textbf{declaration equivalence}: weaker version of name equivalence. \(t2=t1\) are interpreted
as establishing type aliases rather than new types

\subsubsection{type inference and type checking}
\label{sec:org9e3fcef}

\subsubsection{additional topics in type checking}
\label{sec:orga04e2d4}
\begin{itemize}
\item \textbf{overloading}
\item \textbf{type conversion and coercion}
\end{itemize}
\section{Chap7 runtime environments}
\label{sec:orgcebb9d9}
\subsection{memory organization during program execution}
\label{sec:orgd8c9ffd}
\textbf{procedure activation record}
\begin{center}
\begin{tabular}{l}
space for arguments(parameters)\\
\hline
space for bookkeeping information, including return address\\
\hline
space for local data\\
\hline
space for local temporaries\\
\end{tabular}
\end{center}

\textbf{processor registers}
\begin{itemize}
\item part of the structure of the runtime environment
\item special-purpose registers
\begin{description}
\item[{PC}] program counter
\item[{SP}] stack pointer
\item[{FP}] frame pointer
\item[{AP}] argument pointer
\end{description}
\end{itemize}

\textbf{calling sequence}
\begin{enumerate}
\item the allocation of memory for the activation record
\item the computation and storing of the arguments
\item the storing and setting of necessary registers to affect the call
\end{enumerate}

\textbf{returning sequence}
\begin{enumerate}
\item the placing of the return value where it can be accessed by the caller
\item the readjustment of registers
\item the possible releasing for activation record memory
\end{enumerate}
\subsection{fully static runtime environment}
\label{sec:org3cfcbf4}
all data are static, remaining fixed in memory for the duration of program execution

\uline{No pointer or dynamic allocation. no recursive procedure calling}

entire program memory
\begin{itemize}
\item the global variables and all variables are allocated statically
\item each procedure has only a single activation record
\item all variables can be accessed directly via fixed address
\item no extra information about the environment needs to be kept in an
activation record
\end{itemize}

the calling sequence(simple)
\begin{enumerate}
\item each argument is computed and stored into its appropriate parameter location
in the activation of the procedure being called
\item the \textbf{return address} of the caller is saved
\item a jump is made to the begining of the code of the called procedure
\item on return, a simple jump is made to the return address
\end{enumerate}
\subsection{stack-based runtime environments}
\label{sec:org45de23e}
the stack of \textbf{activation records} grows and shrinks with the main of calls in
the executing program

Each procedure may have several \textbf{different activation records} on the call
stack at one time

In a language where \uline{all procedures are global}, the stack-based environment
requires two things
\begin{enumerate}
\item frame point, \texttt{fp}, a pointer to the current activation record to allow
access to local variable.

\textbf{control link} or \textbf{dynamic link}, a point to a record of the immediately
preceding activation
\item stack pointer, \texttt{sp}, a pointer to the last location allocated on the call stack
\end{enumerate}
\subsubsection{stack-based environments without local procedures}
\label{sec:org132c02a}
the calling sequence
\begin{enumerate}
\item compute the \emph{arguments} and store them in their correct positions in the
new activation record of the procedure.

because C parameters' order is reverse because of an indefinite number of
arguments
\item store the \emph{fp} as the control link in the new activation record
\item change the \emph{fp} s.t. it points to the beginning of the new activation
record
\item store the \emph{return address} in the new activation record
\item perform a \emph{jump} to the code of the procedure to be called
\end{enumerate}


when a procedure exits
\begin{enumerate}
\item copy the \emph{fp} to the \emph{sp}
\item load the control link into the \emph{fp}
\item perform a \emph{jump} to the return address
\item change the \emph{sp} to pop the arguments
\end{enumerate}
\end{document}